***********************************************************************************************
* This command file contains the spatial demand model estimator for real-estate transactions. *
***********************************************************************************************

clear mata
set more off
set seed 1234
set trace off

//COMPONENT 1: COMMAND SHELL IN STATA (see spatial_demand_Q for details)
program define spatialdemand_logQ1, eclass sortpreserve
    version 14
//Section 1: Defining the syntax of the program and splitting the data into dependent variables
    syntax varlist(numeric ts fv) [if] [in] [, noCONStant]
    marksample touse
    tempname b V N rank df_r X
	    gettoken firmid remains1 : varlist
    gettoken depvar remains2 : remains1
    gettoken marketid remains3 : remains2
    gettoken marketsize cnames : remains3
//Section 2: Calling the mata program
    mata: mywork_logQ2("`varlist'", "`touse'", "`constant'", "`b'", "`V'", "`N'", "`rank'", "`df_r'") 
//Section 3: Based on the output of the mata program, we can then store some information which is necessary for the post-estimation results.
    if "`constant'" == "" { //`constant' includes whatever is specified in the noconstant option. If this is empty, then we should include a constant.
    	local cnames `cnames' _cons nest //If the user has not specified that they don't want to have a constant, then we need to add a constant to the list on factor variables.
    }
    matrix colnames `b' = `cnames'
    matrix colnames `V' = `cnames'
    matrix rownames `V' = `cnames'
    ereturn post `b' `V', esample(`touse')
    ereturn scalar N       = `N'
    ereturn scalar rank    = `rank'
    ereturn scalar df_r    = `df_r'
    ereturn local  cmd     "spatial demand regression"
    ereturn display
end


//COMPONENT 2: SPATIAL DEMAND ESTIMATES IN MATA (see spatial_demand_Q for details)
mata:
 numeric matrix spdemand_log(numeric matrix X, numeric matrix beta) //Here we specify what kind of arguments we want to be fed into the function.
{
numeric matrix demand //This defines the result as a numeric matrix called demand.
nest_parameter=1/(1+exp(-beta[rows(beta)])) //The nest parameter is transformed in order to be strictly within the 0-1 interval.
firm_utility=exp((X[,5..cols(X)]*beta[1..(rows(beta)-1)])/nest_parameter) //Utility (nx notary-market combinations x 1)
info = panelsetup(X, 3) //Each row of info corresponds to a group based on the market ID.
 //The first element of the row shows the observation id of the first element of the group.
 //The second element of the row shows the observation id of the last element of the group.
n_markets   = rows(info) //Number of markets
nest_multiplier= J(n_markets, 1, 0) //nm x 1 matrix where we will store the matrix we need for computing clustered standard errors
prob=J(rows(X),1,0) //this is vector with rows equal to the number of notary-market combinations
for(i=1; i<=n_markets; i++) { //i goes over the markets
     market_utility = panelsubmatrix(firm_utility,i,info) //Use the i-th row of the info matrix to cut out the group-specific rows of the utility matrix. This selects all firm utilities with-in a given market.
     nest_utility=sum(market_utility) //Sum all firm utilities with-in a given market
	 nest_multiplier[i,1]=((nest_utility)^(nest_parameter-1))/(1+nest_utility^nest_parameter) //NOTE: this is not the probability that the consumer will purchase notary services, since we use "nest_parameter-1" in the exponential in the numerator. We do this to spare ourselves a division by the sum of all notary utilities later.
	 k1=info[i,2] //This is the observation number of the last element in the group
  if (i==1) {
                 k0=1 //This is the observation number of the first element of the group, if we are looking at the first group.
				 nrep=k1 //This is the observation number of the last element in the group
		}
         else {
                 k0=1+info[i-1,2] //This is the observation number of the first element of the group (it is the number of the last element of the previous group + 1).
				 nrep=k1-info[i-1,2] //This is the total number of observations within the group	
				 }
prob[k0..k1]=firm_utility[k0..k1]:*J(nrep,1,nest_multiplier[i,1]) //This is the probability that the consumers from market i purchase from a specific notary.
}
X_res=X,prob //Here we concatinate the probabilities to the vector of X. We rename the vector X_res, mainly to avoid overwriting the original matrix, which may be referred to by other code later on in the analysis or will be necessary if we loop over this again.
demand_per_market=X_res[,cols(X_res)]:*X_res[,4] //The probability is in the last column of X_res. This is multiplied by the market size (which is in the fourth column).
X_res=X_res,demand_per_market //Here we concatinate the expected sales per notary per market to the vector of X_res.
X_res=sort(X_res,1) //Sort by firm_ID
info2 = panelsetup(X_res, 1) //Generate groups based on firm IDs (these are the markets served by a specific notary).
n_firms   = rows(info2) //Number of notaries
demand=J(n_firms,3,0) //Generate the demand matrix, which contains rows equivalent to a specific notary. The first column consists of the ID of the notary. The second colum contains the log estimated demand per notary. The third column contains the log observed demand per notary.
for(i=1; i<=n_firms; i++) { //i goes over the firms
demand_not = panelsubmatrix(X_res[,cols(X_res)],i,info2) //demand_not is the portion of the vector of estimated demands per market that corresponds to notary i.
id_not = panelsubmatrix(X_res[,1],i,info2) //id_not is the portion of the vector of firm IDs (saved as the first column of matrix X) that contains the ID of notary i (these should be identical values).
real_demand_not = panelsubmatrix(X_res[,2],i,info2) //real_demand_not is the portion of the vector of sales that corresponds to the actual sales of notary i (again, the values should be identical across markets).
demand[i,1]=mean(id_not) //The first column of the demand matrix contains the IDs of the firms.
demand[i,2]=log(sum(demand_not)) //The second column of the demand matrix contains the log of the sum of market-level demands of the notary.
demand[i,3]=log(mean(real_demand_not)) //The third column contains the log  of the mean observed demand. Since we do not observe market-specific demand per notary, this is just the mean of a vector of identical demand values (at the notary level).
}
return(demand)
}
end

//COMPONENT 3: OPTIMIZATION SETTINGS IN MATA (see spatial_demand_Q for details)
//This defines how the spatial demand will be optimized. 
mata:

void mywork_logQ2( string scalar vars, 
             string scalar touse,   string scalar constant, 
	     string scalar bname,   string scalar Vname,     
	     string scalar nname,   string scalar rname,     
	     string scalar dfrname) 
{
    real vector b
    real matrix X, V
    real scalar n, k

    X    = st_data(., vars, touse) //This selects the corresponding control variables.
    nx    = rows(X)
    if (constant == "") { //This states that a constant should be added.
        X    = X,J(nx,1,1)
    }
//These are the parameters that we converge to when we start with zero values:
initial_values=(-.01991319,     .05862834,    -.00118385,    -.04633767,     12.071074,      6.859347,.81446984,    -.15941382,    -13.513698,    -2.1236837)
S  = optimize_init()
optimize_init_argument(S, 1, X)
optimize_init_technique(S,"bfgs")
rseed(1234)
optimize_init_conv_maxiter(S,200)
optimize_init_evaluator(S, &demandeval_log()) //The evaluator function is called plleval, store this information into the object S, which contains the address of the revelant information for the evaluator.
optimize_init_params(S, initial_values) 
b = optimize(S)
b=b'
V    = optimize_result_V_oim(S) //Variance matrix
k = cols(X) - 3 - diag0cnt(invsym(V)) //Rank of the variance matrix
n = rows(uniqrows(X[,1]))

    st_matrix(bname, b') //Since we specify in the function above that bname is actually `b', the estimates will be saved in the Stata local `b'. 
    st_matrix(Vname, V)
    st_numscalar(nname, n)
    st_numscalar(rname, k)
    st_numscalar(dfrname, n-k)

}
end

//COMPONENT 4: FUNCTION TO BE OPTIMIZED IN MATA (see spatial_demand_Q for details)
//The evaluator maximizes, which is why we calculate the negative of the sum of squared errors.
mata:
void demandeval_log(real scalar todo, real vector b, real matrix X, val, grad, hess)
 {
    real matrix demand
    demand = spdemand_log(X,b')
	n_firms=rows(demand)
    val = -sum((demand[,3]-demand[,2]):^2)
 }

end


